package main

import (
	"bytes"
	"flag"
	"fmt"
	"io/ioutil"
	"log"
	"os"
	"path/filepath"
	"strings"

	"golang.org/x/text/encoding/charmap"
	"golang.org/x/text/encoding/japanese"

	"golang.org/x/text/encoding/traditionalchinese"

	"golang.org/x/text/encoding"

	"github.com/saintfish/chardet"
	"golang.org/x/text/encoding/simplifiedchinese"
	"golang.org/x/text/transform"
)

var (
	flagDir          = flag.String("d", ".", "work dir, default=.")
	flagExtend       = flag.String("e", "*", "file extends, default=*")
	flagWhiteListDir = flag.String("w", "", "dirs in white list, default is empty")
	flagBlackListDir = flag.String("b", "", "dirs in black list, default is empty")
)

var gFileExtend = map[string]struct{}{}
var gDirsInWhiteList = map[string]string{}
var gDirsInBlackList = map[string]string{}
var gUtfCharset = map[string]struct{}{
	"UTF-8":      struct{}{},
	"ISO-8859-1": struct{}{},
	"ISO-8859-9": struct{}{},
}
var gDecoderFactory = map[string]func() *encoding.Decoder{
	"GB-18030": func() *encoding.Decoder {
		return simplifiedchinese.GBK.NewDecoder()
	},
	"Big5": func() *encoding.Decoder {
		return traditionalchinese.Big5.NewDecoder()
	},
	"windows-1252": func() *encoding.Decoder {
		return charmap.Windows1252.NewDecoder()
	},
	"EUC-JP": func() *encoding.Decoder {
		return charmap.Windows1252.NewDecoder()
	},
	"Shift_JIS": func() *encoding.Decoder {
		return japanese.ShiftJIS.NewDecoder()
	},
	"EUC-KR": func() *encoding.Decoder {
		//return korean.EUCKR.NewDecoder()
		return simplifiedchinese.GBK.NewDecoder()
	},
	"IBM420_rtl": func() *encoding.Decoder {
		return simplifiedchinese.GBK.NewDecoder()
	},
}

func isDirInWhiteList(path string) bool {
	for _, d := range gDirsInWhiteList {
		if strings.Contains(path, d) {
			return true
		}
	}
	return false
}

func isDirInBlackList(path string) bool {
	for _, d := range gDirsInBlackList {
		if strings.Contains(path, d) {
			return true
		}
	}
	return false
}

func isFileMatched(fileName string) bool {
	for extent, _ := range gFileExtend {
		matched, err := filepath.Match("*."+extent, fileName)
		if err != nil {
			continue
		}
		if matched {
			return true
		}
	}
	return false
}

func main() {
	flag.Parse()

	if *flagExtend != "" {
		extends := strings.Split(*flagExtend, ",")
		for _, e := range extends {
			gFileExtend[e] = struct{}{}
		}
	}

	absWorkDir, err := filepath.Abs(*flagDir)
	if err != nil {
		panic(err)
	}

	if *flagWhiteListDir != "" {
		dirsInWhiteList := strings.Split(*flagWhiteListDir, ",")
		for _, d := range dirsInWhiteList {
			gDirsInWhiteList[d] = filepath.Join(absWorkDir, d)
		}
	}

	if *flagBlackListDir != "" {
		dirsInBlackList := strings.Split(*flagBlackListDir, ",")
		for _, d := range dirsInBlackList {
			gDirsInBlackList[d] = filepath.Join(absWorkDir, d)
		}
	}

	fmt.Printf("scan dir [%s]\n", absWorkDir)

	err = filepath.Walk(*flagDir, func(path string, info os.FileInfo, err error) error {
		if err != nil {
			return err
		}

		absFilePath, _ := filepath.Abs(path)
		if absFilePath == absWorkDir {
			// 跳过工作目录
			return nil
		}

		if len(gDirsInWhiteList) > 0 {
			if !isDirInWhiteList(absFilePath) {
				return nil
			}
		} else if len(gDirsInBlackList) > 0 {
			if isDirInBlackList(absFilePath) {
				return nil
			}
		}

		// 是不是指定扩展名的文件
		matched := isFileMatched(info.Name())
		if !matched {
			return nil
		}

		content, err := ioutil.ReadFile(absFilePath)
		if err != nil {
			log.Panicf("%v, absFilePath=[%s]", err, absFilePath)
		}

		detector := chardet.NewTextDetector()
		result, err := detector.DetectBest(content)
		if err != nil {
			log.Panicf("detect [%s] charset fail, %v", absFilePath, err)
		}

		//fmt.Printf("file=[%s] charset=[%s] language=[%s]\n", absFilePath, result.Charset, result.Language)

		var reader *transform.Reader
		if _, ok := gUtfCharset[result.Charset]; ok {
			// 已经是utf8了
			return nil
		}

		fn := gDecoderFactory[result.Charset]
		if fn == nil {
			log.Panicf("unknown charset=[%s] language=[%s] file=[%s]", result.Charset, result.Language, absFilePath)
		}
		reader = transform.NewReader(bytes.NewReader(content), fn())
		if reader == nil {
			log.Panicf("unknown charset=[%s] language=[%s] file=[%s]", result.Charset, result.Language, absFilePath)
		}

		newContent, err := ioutil.ReadAll(reader)
		if err != nil {
			panic(err)
		}

		err = ioutil.WriteFile(absFilePath, newContent, 0666)
		if err != nil {
			panic(err)
		}

		fmt.Printf("-> %s\n", absFilePath)

		return nil
	})
}
